#!/usr/bin/env python

import sys

from BeautifulSoup import BeautifulSoup

class Parser(object):
    def __init__(self, filename):
        self.filename = filename
        self.next = ""
        self.content = ""

    def parse(self):
        stream = open(self.filename)
        soup = BeautifulSoup(stream.read())
        stream.close()

        def _f(tag):
            is_link = tag.name == 'a'
            has_href = 'href' in dict(tag.attrs)
            is_next = bool([ x for x in tag.contents if 'next' in x ])
            return is_link and has_href and is_next

        tag_next = soup.findAll(_f)
        if tag_next:
            self.next = dict(tag_next[-1].attrs).get("href", "")

        page = soup.findAll(id="ebook_page")

        try:
            contents = []
            for content in page[0].contents:
                contents.append(unicode(content))
            self.content = u''.join(contents)
        except IndexError:
            print >>sys.stderr, "DEBUG:", page, dir(page)

def processor(start):
    filename = start
    while filename:
        parser = Parser(filename)
        parser.parse()
        if parser.next:
            yield parser
        filename = parser.next

HEAD = """
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
<html>
    <head>
        <title>%s(title)s</title>
        <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
    </head>
""".strip()

res = [HEAD, "<body>"]
for page in processor(sys.argv[1]):
    res.append(page.content)
    res.append("<!-- %s -->" % (page.next,))
res.append("</body>")
res.append("</html>")

s = BeautifulSoup(''.join(res))
print s.prettify()

